#Source: https://www.fractracker.org/map/us/west-virginia/
#Accessed 25 March 2019

#clean up
rm(list=ls())
options(scipen = 8)
set.seed(715130425,kind="Mersenne-Twister")

#load libraries
library(spatstat)
library(maptools)
library(rgdal)
library(maps)
library(foreign)
library(mgcv)
library(spdep)
library(car)
library(geoR)
library(spBayes)

#load map polygon data
wells<-readOGR(dsn="WVMarcellusWellsCompleted_Permitted_102014.shp")
wells.data<-wells@data

#plot data
#pdf("wvWells.pdf")
map(database="state",region="West Virginia")
points(x=wells.data$LonSrf,y=wells.data$LatSrf,pch=20,cex=.25)
#dev.off()

#data features and descriptive statistics
slotNames(wells)
head(wells@data)
dim(wells@data)
table(wells@data$WellType)#2095 gas, 367 both oil & gas, and 12 gas with oil show [2474]
summary(wells@data)

#subset data and eliminate duplicates
wells.data.subset <- wells.data[c(which(wells.data$GProd2012>0)),]
wells.data.subset<-wells.data.subset[order(wells.data.subset$UTMESrf),]
table(table(wells.data.subset$APINum))
table(wells.data.subset$APINum)[order(-1*table(wells.data.subset$APINum))][1:12]
duplicates<-names(table(wells.data.subset$APINum)[order(-1*table(wells.data.subset$APINum))][1:12])
wells.data.subset[wells.data.subset$APINum%in%duplicates,]
conditions<-(!wells.data.subset$APINum%in%duplicates) | (wells.data.subset$APINum%in%duplicates & grepl("Original Location",wells.data.subset$Suffix)) | (wells.data.subset$APINum==4705101472 & wells.data.subset$Suffix=="Deviated Worked Over")
wells.data.subset.2<-subset(wells.data.subset,subset=conditions)
table(table(wells.data.subset.2$APINum),useNA="always")
wells.data.subset.2<-wells.data.subset.2[order(wells.data.subset.2$UTMESrf,wells.data.subset.2$UTMNSrf),]
wells.data.subset.2$copy<-NA
for(i in 2:dim(wells.data.subset.2)[1]){
  wells.data.subset.2$copy[i]<-as.numeric(wells.data.subset.2$UTMESrf[i]==wells.data.subset.2$UTMESrf[i-1] & wells.data.subset.2$UTMNSrf[i]==wells.data.subset.2$UTMNSrf[i-1])
}
table(wells.data.subset.2$copy)
wells.data.subset.3<-subset(wells.data.subset.2,subset=copy==0)
clean.wells<-wells.data.subset.3
row.names(clean.wells) <- seq(length = nrow(clean.wells))

#examine the variables of interest
hist(clean.wells$GProd2012)
hist(log(clean.wells$GProd2012))
hist(clean.wells$Elevation)
hist(log(clean.wells$Elevation))
hist(clean.wells$RockPres)
hist(log(clean.wells$RockPres))
summary(clean.wells$GProd2012)
summary(clean.wells$Elevation)
summary(clean.wells$RockPres)

#create logged variables
clean.wells$logElevation <- log(clean.wells$Elevation)
clean.wells$logGProd2012 <- log(clean.wells$GProd2012)

#preliminary models
log.lin<-lm(logGProd2012~Elevation+RockPres,data=clean.wells);summary(log.lin)
log.log<-lm(logGProd2012~logElevation+RockPres,data=clean.wells);summary(log.log)
plot(y=clean.wells$logGProd2012,x=clean.wells$Elevation)
plot(y=clean.wells$logGProd2012,x=clean.wells$RockPres)

#run model over all data for cache
prior.length<-30
global.low.phi<-5000
global.high.phi<-30000
global.low.tau<-.05
global.high.tau<-.5

bayes.test <- krige.bayes(coords=cbind(clean.wells$UTMESrf, clean.wells$UTMNSrf), data=clean.wells$logGProd2012, locations=NULL,
                          model = model.control(trend.d =~clean.wells$logElevation + clean.wells$RockPres, cov.model = "exponential"), 
                          prior = prior.control(beta.prior = "flat", 
                          		sigmasq.prior="uniform",
                          		tausq.rel.prior = "uniform", tausq.rel.discrete = seq(from =global.low.tau, to = global.high.tau, length = prior.length), 
                                phi.prior="uniform", phi.discrete = seq(global.low.phi,global.high.phi, length = prior.length)))

plot(y=bayes.test$posterior$phi$distribution,x=names(bayes.test$posterior$phi$distribution),type='l',xlab="Range",ylab="Density")
plot(y=bayes.test$posterior$tausq.rel$distribution,x=names(bayes.test$posterior$tausq.rel $distribution),type='l',xlab="Nugget/Partial Sill",ylab="Density")
bayes.test$posterior$phi$distribution
bayes.test$posterior$tausq.rel$distribution
apply(bayes.test$posterior$sample,2,FUN=quantile,c(.5,.05,.95))

###DEFINE FUNCTIONS FOR BOOTSTRAP###
# Bayes model function
bayes_model_function <- function(iterations = 1, sample = 50, 
                                 dataset = clean.wells, dv = clean.wells$logGProd2012,
                                 form = ~clean.wells$logElevation + clean.wells$RockPres,
                                 x = clean.wells$UTMESrf, y = clean.wells$UTMNSrf, 
                                 low.tau = global.low.tau, high.tau = global.high.tau,
                                 low.phi = global.low.phi, high.phi = global.high.phi, cache =NULL) {
  print(nrow(cbind(x,y)))
  print(nrow(dataset))
  print(length(dv))
  minSampleSize <- length(attr(terms(form), "variables")) + 4
  if (sample < minSampleSize) {
    stop(paste0("bayes_model_function(): Sample size must be at least ", minSampleSize))
  }
  
  sampleSize <- sample
  bayes.percent <- sampleSize / nrow(dataset)
  
  # Caching functionality: if multiple iterations are being run, the `cache` parameter
  # can be set to skip repeat runs of krige.bayes on the entire dataset.
  if (is.null(cache)) {
    print("*** Running krige.bayes with the full dataset first")
    print(Sys.time())
    bayes.test <- krige.bayes(coords = cbind(x, y), data = dv, locations = NULL,
                              model = model.control(trend.d = trend.spatial(form, dataset), 
                                                    cov.model = "exponential"), 
                           prior = prior.control(beta.prior = "flat", 
                          		sigmasq.prior="uniform",
                          		tausq.rel.prior = "uniform", tausq.rel.discrete = seq(from =global.low.tau, to = global.high.tau, length = prior.length), 
                                phi.prior="uniform", phi.discrete = seq(global.low.phi,global.high.phi, length = prior.length)),
                              output = output.control(messages = FALSE))
  } else {bayes.test <- cache}
  
  # Collate results from krige.bayes on the full dataset.
  bayes.results <- bayes.test$posterior$sample
  columns <- dim(bayes.results)[2]
  bayes.whole.percentile <- t(apply(bayes.results, 2, quantile, c(0.5, 0.05, 0.95)))
  whole.bayes.mean.mat <- matrix(apply(bayes.results, 2, mean), ncol = columns, nrow = 1)
  
  # Print results from full data model
    print("*** Bayes whole data summary, posterior and summary", quote = FALSE)
    print(summary(bayes.results))
    
    print("*** Whole data analysis: median, 5th, and 95th percentile (bayes.whole.percentile)", quote = FALSE)
    print(bayes.whole.percentile)
  
  # Output matrix for iterations
  all.bayes <- matrix(NA, nrow = iterations, ncol = columns)
  
  # Running the iterations
  for (i in 1:iterations) {
    sample.indices <- sample(x = nrow(dataset), size = sampleSize)
    
    bayes.rand <- dataset[sample.indices, ]
    x.sub <- x[sample.indices]
    y.sub <- y[sample.indices]
    dv.sub <- dv[sample.indices]
    form.sub <- ~bayes.rand$logElevation + bayes.rand$RockPres 
    
    bayes.iter <- krige.bayes(coords = cbind(x.sub, y.sub), data = dv.sub, 
                              locations = NULL,
                              model = model.control(trend.d = trend.spatial(form.sub, bayes.rand),  
                                                    cov.model = "exponential"),
                          prior = prior.control(beta.prior = "flat", 
                          		sigmasq.prior="uniform",
                          		tausq.rel.prior = "uniform", tausq.rel.discrete = seq(from =global.low.tau, to = global.high.tau, length = prior.length), 
                                phi.prior="uniform", phi.discrete = seq(global.low.phi,global.high.phi, length = prior.length)),
                              output = output.control(messages = FALSE))
    
    bayes.sub.results <- as.matrix(bayes.iter$posterior$sample)
    all.bayes[i, ] <- apply(bayes.sub.results,2,mean)
  }
  
  print(paste("***dataset size =", dim(dataset)[1]), quote = FALSE)
  print(paste("***", iterations, "iterations, n =", sampleSize, "and p =", bayes.percent), quote = FALSE)
  
  # Calculate 50th, 5th, and 95th percentiles of combined matrix
  bayes.all.percentile <- t(apply(all.bayes, 2, quantile, c(0.5, 0.05, 0.95)))
  print("*** Bootstrap results: median, 5th, and 95th percentile (bayes.whole.percentile)", quote = FALSE)
  print(bayes.all.percentile)
  
  # Combine means of whole dataset with average.bayes.mat
  print("***Bayes mean matrix (average.bayes.mat)", quote = FALSE)
  average.bayes.mat <- matrix(apply(all.bayes, 2, mean), ncol = columns, nrow = 1)
  average.bayes.mat <- rbind(average.bayes.mat, whole.bayes.mean.mat)
  rownames(average.bayes.mat) <- c("all.bayes Means", "Whole Data Set Means")
  print(average.bayes.mat)
  
  # Find difference
  print("***Whole data set means vs. all.bayes Means difference (bayes.diff)", quote = FALSE)
  bayes.diff <- diff(average.bayes.mat)
  rownames(bayes.diff) <- ("Diff")
  print(bayes.diff)
  
  # Find PMAD
  print("***Percent mean absolute differences (bayes.pmad):", quote = FALSE)
  iterationMeans <- average.bayes.mat["all.bayes Means", ]
  wholeMeans <- average.bayes.mat["Whole Data Set Means", ]
  bayes.pmad <- 100 * abs((iterationMeans - wholeMeans) / wholeMeans)
  print(bayes.pmad)
  
  # Print finish
  print(paste("***bayes_model_function finished at", Sys.time()), quote = FALSE)
  print("-----------------------------------------------------------", quote = FALSE)
  
  return(list(all.bayes = all.bayes,
              bayes.all.percentile = bayes.all.percentile,
              bayes.results = bayes.results,
              bayes.whole.percentile = bayes.whole.percentile,
              average.bayes.mat = average.bayes.mat,
              bayes.diff = bayes.diff,
              bayes.pmad = bayes.pmad,
              cache = bayes.test))
}

# Loop function for Bayes
bayes_iterations <- function(iterations = NULL, sample = 50, cache = NULL) {
  result <- NULL
  
  if (sample <= 1) {
    percent <- sample * 100
    print(paste("Starting bayes_iterations (", percent, "%),", iterations, 
                "iterations"), quote = FALSE)
  } else {
    print(paste("Starting bayes_iterations (n =", sample, "),", iterations, 
                "iterations"), quote = FALSE)
  }
  
  print(Sys.time())
  
  for (i in 1:length(iterations)) {
    iteration <- bayes_model_function(iterations = iterations[i], sample = sample, cache = cache)
    
    #if cache is empty, fill it with the full model from the first run
    if (is.null(cache)) {
      cache <- iteration$cache
    }
    
    #if there is no result matrix, fill it in
    if (is.null(result)) {
      result <- matrix(NA, nrow = length(iterations), ncol = 6)
    }
    
    result[i, ] <- iteration$bayes.pmad
  }
  
  if (sample <= 1) {
    percent <- sample * 100
    print(paste("Finishing bayes_iterations (", percent, "%),", iterations, 
                "iterations"), quote = FALSE)
  } else {
    print(paste("Finishing bayes_iterations (n =", sample, "),", iterations, 
                "iterations"), quote = FALSE)
  }
  
  return(list(result = result, cache = cache))
}


# Exporter function. Writes CSV and plots
export.bayes <- function(plot = NULL, iterations = NULL, sample = 50, directory = NULL) {
  names <- c("beta0", "beta1", "beta2", "sigmasq", "phi", "tausq.rel")
  
  colnames(plot) <- names
  rownames(plot) <- c(iterations)
  
  if (sample <= 1) {
    percent <- sample * 100
    
    csvFilename <- paste0("bayes.", percent, "pct.csv")
    yLabel <- paste0("bayes.pmad (", percent, "%)")
  } else {
    csvFilename <- paste0("bayes.", sample, "n.csv")
    yLabel <- paste0("bayes.pmad (n = ", sample, ")")
  }
  
  if (!is.null(directory)) {
    csvFilename <- paste0(directory, "/", csvFilename)
  }
  
  write.csv(plot, csvFilename, row.names = TRUE)
  
  for (i in 1:length(names)) {
    if (sample <= 1) {
      pdfFilename <- paste0("bayes.", percent, "pct.", names[i], ".pdf")
    } else {
      pdfFilename <- paste0("bayes.", sample, "n.", names[i], ".pdf")
    }
    
    if (!is.null(directory)) {
      pdfFilename <- paste0(directory, "/", pdfFilename)
    }
    
    pdf(pdfFilename,width=4,height=4)
    par(las = 1)
    plot(iterations, plot[, i], xlab = "Iterations", ylab = yLabel, main = names[i], 
         type = "b")
    dev.off()
  }
  
}

# Looper function
permutation.bayes <- function(iterationClasses = NULL, sampleClasses = NULL,
                              exportDirectory = NULL, cache=NULL) {
  cache <- cache
  
  print(paste0("Starting program execution at ", Sys.time()), quote = FALSE)
  
  for (sample in sampleClasses) {
    programRun <- bayes_iterations(iterations = iterationClasses, sample = sample, cache = cache)
    
    export.bayes(plot = programRun$result, iterations = iterationClasses, sample = sample,
                 directory = exportDirectory)
    
    if (is.null(cache)) {
      cache <- programRun$cache
    }
  }
  
  print(paste0("Program execution completed at ", Sys.time()), quote = FALSE)
}

#RUN IT ALL
#iterationSizes <- c(1:20, 30, 40, 50)
iterationSizes <- c(50)
#sampleSizes <- c(50, 250, 500)
sampleSizes<-c(500)
permutation.bayes(iterationClasses = iterationSizes, sampleClasses = sampleSizes, exportDirectory = "POLSData", cache= bayes.test)

###graph the semivariogram###
#define semivariogram function
exponential.semivariogram<-function(d,nugget,partial.sill,decay){
	nugget+partial.sill*(1-exp(-decay*d))
}

#obtain empirical semivariogram of residuals
log.log<-lm(logGProd2012~logElevation+RockPres,data=clean.wells);summary(log.log)
resid.robust<-variog(coords=cbind(clean.wells$UTMESrf, clean.wells$UTMNSrf), data=log.log$residuals, estimator.type="modulus")

#make confidence bands
ruler<-seq(0,400000,length=500)
full.lower<-exponential.semivariogram(d=ruler,nugget=2.4428*0.0966,partial.sill=2.4428,decay=1/8448.2759)
full.upper<-exponential.semivariogram(d=ruler,nugget=3.7879*0.1586,partial.sill=3.7879,decay=1/14482.7586)
boot.lower<-exponential.semivariogram(d=ruler,nugget=2.3936*0.0845,partial.sill=2.3936,decay=1/13584.7414)
boot.upper<-exponential.semivariogram(d=ruler,nugget=3.6778*0.2923,partial.sill=3.6778,decay=1/24672.4569)

#pdf("wvSemivariogram.pdf")
plot(x=c(0,400000),y=c(0,5),xlab="Distance in Meters",ylab="Semivariance",type='n')
polygon(x=c(ruler,rev(ruler)), y=c(full.lower,rev(full.upper)),border=NA,col=rgb(.9,0,0,.3)) #full
polygon(x=c(ruler,rev(ruler)), y=c(boot.lower,rev(boot.upper)),border=NA,col=rgb(0,0,.9,.3))#boot
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=2.9698*0.1276,partial.sill=2.9698,decay=1/11034.4828),col='red',lty=1,lwd=3)#full
lines(x=ruler,y=exponential.semivariogram(d=ruler,nugget=3.0144*0.1551,partial.sill=3.0144,decay=1/21501.7241),col='blue',lty=2,lwd=3)#boot
points(x=resid.robust$u,y=resid.robust$v)
legend(x=100000,y=1,legend=c("Full Data","Bootstrap"),lty=c(1,2),lwd=2,col=c("red","blue"))
#dev.off()

###OUTPUT RESULTS###
n50 <- read.csv(file = "POLSData/bayes.50n.csv")
n250 <- read.csv(file = "POLSData/bayes.250n.csv")
n500 <- read.csv(file = "POLSData/bayes.500n.csv")

iterations <- c(1:20, 30, 40, 50)

cols <- c("Beta 0", "Beta 1", "Beta 2", "Partial Sill", "Range", "Nugget/Partial Sill")
names <- c("Beta0", "Beta1", "Beta2", "PartialSill", "Range", "RelativeNugget")

for (i in 1:length(cols)) {
  col <- cols[i]
  max <- max(n50[, i + 1], n250[, i + 1], n500[, i + 1])
  
  jpeg(paste0("bayes", names[i], "combined.jpg"))
  par(las = 1)
  plot(x = iterations, y = n50[, i + 1], col = "#FF0000", ylim = range(c(0, max * 1.3)), xlim = range(c(1, 50)), xlab = "Iterations", ylab = "Percent Mean Absolute Difference", main = col)
  points(x = iterations, y = n50[, i + 1], col = "#FF0000", pch = 15)
  points(x = iterations, y = n250[, i + 1], col = "#00FF00", pch = 17)
  points(x = iterations, y = n500[, i + 1], col = "#0000FF", pch = 20)
  lines(x = iterations, y = n50[, i + 1], col = "#FF0000", lty = 6)
  lines(x = iterations, y = n250[, i + 1], col = "#00FF00", lty = 5)
  lines(x = iterations, y = n500[, i + 1], col = "#0000FF", lty = 1)
  
  legend(1, max * 1.3, legend = c("n = 50", "n = 250", "n = 500"), col = c("#FF0000", "#00FF00", "#0000FF"), lty = c(6, 5, 1), pch = c(15, 17, 20))
  
  dev.off()
}

